#!/bin/bash

DATASET=iwslt14.tokenized.de-en

source ~/.bashrc
conda activate fairseq
mkdir -p checkpoints/$DATASET

python ./fairseq_cli/train.py \
  data-bin/$DATASET\
  --arch transformer \
  --share-decoder-input-output-embed \
  --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \
  --lr 5e-4 --lr-scheduler inverse_sqrt --warmup-updates 4000 \
  --dropout 0.3 --weight-decay 0.0001 \
  --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
  --max-tokens 512 \
  --eval-bleu --ddp-backend=no_c10d \
  --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}' \
  --eval-bleu-detok moses \
  --eval-bleu-remove-bpe \
  --eval-bleu-print-samples \
  --best-checkpoint-metric bleu --maximize-best-checkpoint-metric \
  --prune_start_step 50 --target_sparsity 0.5 \
  --pruning_interval 10 --num_pruning_steps 200 \
  --prune_type magnitude --max-update 50000
